2025 - Bibliometric Elderly Malnutrition

Published

Sunday, 23/02/2025

1 Setup

Code
pacman::p_load(tidyverse, bibliometrix, janitor, stringi, summarytools)

2 Import Database and Export

2.1 Search Criteria

  • Database: Scopus
  • Date Access: 18/02/2025
  • Search Term:

TITLE-ABS-KEY ( ( “malnutrition” OR “undernutrition” OR “nutritional deficiency” OR “protein-energy malnutrition” OR “nutritional status” ) AND ( “elderly” OR “older adults” OR “aging population” OR “geriatric” OR “frail elderly” ) AND ( “nutrition intervention” OR “dietary intake” OR “sarcopenia” OR “frailty” OR “gut microbiota” OR “muscle loss” OR “health outcomes” ) ) AND PUBYEAR > 1994 AND PUBYEAR < 2025 AND ( LIMIT-TO ( DOCTYPE , “ar” ) ) AND ( LIMIT-TO ( SRCTYPE , “j” ) ) AND ( LIMIT-TO ( PUBSTAGE , “final” ) )

2.2 Import Database

Code
eldmln_ds <- convert2df("250218_ScopusSearch.csv", 
           dbsource = "scopus", format = "csv") %>% 
  distinct(TI, .keep_all = T)

eldmln_ds %>% 
  head()

write_rds(eldmln_ds, "eldmln_ds.rds")
Code
eldmln_ds <- read_rds("eldmln_ds.rds")
Code
eldmln_ds %>% 
  head()
Code
eldmln_ds %>% 
  names()
 [1] "AU"                         "AF"                        
 [3] "Author.s..ID"               "TI"                        
 [5] "PY"                         "SO"                        
 [7] "VL"                         "IS"                        
 [9] "Art..No."                   "Page.start"                
[11] "Page.end"                   "PP"                        
[13] "TC"                         "DI"                        
[15] "URL"                        "Affiliations"              
[17] "C1"                         "AB"                        
[19] "DE"                         "ID"                        
[21] "Molecular.Sequence.Numbers" "Chemicals.CAS"             
[23] "Tradenames"                 "Manufacturers"             
[25] "FU"                         "FX"                        
[27] "CR"                         "RP"                        
[29] "Editors"                    "PU"                        
[31] "Sponsors"                   "Conference.name"           
[33] "Conference.date"            "Conference.location"       
[35] "Conference.code"            "ISSN"                      
[37] "ISBN"                       "CODEN"                     
[39] "PubMed.ID"                  "LA"                        
[41] "JI"                         "DT"                        
[43] "Publication.Stage"          "OA"                        
[45] "DB"                         "UT"                        
[47] "C1raw"                      "J9"                        
[49] "AU_UN"                      "AU1_UN"                    
[51] "AU_UN_NR"                   "SR_FULL"                   
[53] "SR"                        

Key Columns Names

  • AU : Authors
  • AF : Authors Full Name
  • Author.s..ID (KIV to rename)
  • SO : Source (i.e., Journal Name)
  • DE : Author Keywords
  • ID : Scopus Keywords

2.3 Explore Dataset

Duplicate Title

Code
dup_ti <- eldmln_ds %>% 
  count(TI) %>% 
  filter(n > 1) %>% 
  pull(TI)

eldmln_ds %>% 
  filter(TI %in% dup_ti) %>% 
  arrange(TI)

Author

Code
eldmln_ds %>%
  mutate(author_count = stri_count_fixed(AF, ";") + 1) %>% 
  filter(author_count == 1)
Code
eldmln_ds %>% 
  select(TI, AF) %>% 
  separate(AF, into = paste0("af", 1:10), sep = ";", 
           extra = "drop", fill = "right") %>% 
  pivot_longer(cols = starts_with("af"), 
               names_to = "author_position", 
               values_to = "au_nameid") %>%
  drop_na(au_nameid) %>% 
  mutate(
    au_nameid = str_trim(au_nameid),  # Trim whitespace
    au_nameid = if_else(str_detect(au_nameid, "\\(.+\\)"), au_nameid, paste0(au_nameid, " (NA)")), # Handle missing Scopus IDs
    au_name = str_extract(au_nameid, "^[^(]+") %>% str_trim(), # Extract name before "("
    au_scid = str_extract(au_nameid, "(?<=\\().+?(?=\\))") # Extract Scopus ID inside "()"
  ) %>% 
  distinct(au_name, au_scid)  %>%
  group_by(au_name) %>%
  filter(n_distinct(au_scid) > 1) %>%
  arrange(au_name, au_scid) %>%
  ungroup()
Code
eldmln_ds %>% 
  select(TI, AF) %>% 
  separate(AF, into = paste0("af", 1:10), sep = ";", 
           extra = "drop", fill = "right") %>% 
  pivot_longer(cols = starts_with("af"), 
               names_to = "author_position", 
               values_to = "au_nameid") %>%
  drop_na(au_nameid) %>% 
  mutate(
    au_nameid = str_trim(au_nameid),  # Trim whitespace
    au_nameid = if_else(str_detect(au_nameid, "\\(.+\\)"), au_nameid, paste0(au_nameid, " (NA)")), # Handle missing Scopus IDs
    au_name = str_extract(au_nameid, "^[^(]+") %>% str_trim(), # Extract name before "("
    au_scid = str_extract(au_nameid, "(?<=\\().+?(?=\\))") # Extract Scopus ID inside "()"
  ) %>% 
  distinct(au_name, au_scid)  %>%
  group_by(au_scid) %>%
  filter(n_distinct(au_name) > 1) %>%
  arrange(au_scid, au_name) %>%
  ungroup()

Year

Code
eldmln_ds %>% 
  count(PY)

Document Type

Code
eldmln_ds %>% 
  count(DT)

3 Analysis

3.1 Bibliometric Summary

Code
eldmln_bibres <- biblioAnalysis(eldmln_ds, sep = ";") 

write_rds(eldmln_bibres, "eldmln_bibres.rds")
Code
eldmln_bibres <- read_rds("eldmln_bibres.rds")
Code
summary(eldmln_bibres)


MAIN INFORMATION ABOUT DATA

 Timespan                              1995 : 2024 
 Sources (Journals, Books, etc)        1144 
 Documents                             4567 
 Annual Growth Rate %                  13.31 
 Document Average Age                  7.33 
 Average citations per doc             31.76 
 Average citations per year per doc    3.318 
 References                            161427 
 
DOCUMENT TYPES                     
 article      4567 
 
DOCUMENT CONTENTS
 Keywords Plus (ID)                    11981 
 Author's Keywords (DE)                6100 
 
AUTHORS
 Authors                               22474 
 Author Appearances                    33176 
 Authors of single-authored docs       157 
 
AUTHORS COLLABORATION
 Single-authored docs                  171 
 Documents per Author                  0.203 
 Co-Authors per Doc                    7.26 
 International co-authorships %        16.68 
 

Annual Scientific Production

 Year    Articles
    1995       15
    1996       20
    1997       19
    1998       29
    1999       28
    2000       36
    2001       42
    2002       32
    2003       39
    2004       45
    2005       36
    2006       47
    2007       56
    2008       57
    2009       46
    2010       51
    2011       61
    2012       74
    2013      123
    2014      146
    2015      178
    2016      163
    2017      241
    2018      290
    2019      327
    2020      411
    2021      484
    2022      451
    2023      458
    2024      562

Annual Percentage Growth Rate 13.31 


Most Productive Authors

   Authors        Articles  Authors        Articles Fractionalized
1   MAEDA K             40 MAEDA K                            7.63
2   VELLAS B            37 WAKABAYASHI H                      7.34
3   VOLKERT D           35 VOLKERT D                          7.19
4   ZHANG Y             35 VELLAS B                           6.36
5   WAKABAYASHI H       34 DE GROOT LCPGM                     5.40
6   LI Y                32 ZHANG Y                            4.88
7   CEDERHOLM T         29 LI Y                               4.80
8   CESARI M            29 VAN STAVEREN WA                    4.79
9   ZHANG X             27 CHEN L-K                           4.60
10  CHEN L-K            26 WON CW                             4.40


Top manuscripts per citations

                           Paper                                    DOI   TC TCperYear   NTC
1  LIM SS, 2012, LANCET                   10.1016/S0140-6736(12)61766-8 9581     684.4 30.94
2  VOS T, 2012, LANCET                    10.1016/S0140-6736(12)61729-2 6480     462.9 20.93
3  CLAESSON MJ, 2012, NATURE              10.1038/nature11319           2555     182.5  8.25
4  FIELDING RA, 2011, J AM MED DIR ASSOC  10.1016/j.jamda.2011.01.003   2461     164.1 25.92
5  KWEON S, 2014, INT J EPIDEMIOL         10.1093/ije/dyt228            1653     137.8 22.20
6  FOREMAN KJ, 2018, LANCET               10.1016/S0140-6736(18)31694-5 1620     202.5 47.67
7  CRUZ-JENTOFT AJ, 2014, AGE AGEING      10.1093/ageing/afu115         1447     120.6 19.44
8  MUSCARITOLI M, 2010, CLIN NUTR         10.1016/j.clnu.2009.12.004    1333      83.3 20.25
9  DEUTZ NEP, 2014, CLIN NUTR             10.1016/j.clnu.2014.04.007    1151      95.9 15.46
10 KELAIDITI E, 2013, J NUTR HEALTH AGING 10.1007/s12603-013-0367-2      757      58.2 16.25


Corresponding Author's Countries

          Country Articles   Freq SCP MCP MCP_Ratio
1  CHINA               471 0.1189 431  40    0.0849
2  JAPAN               440 0.1111 420  20    0.0455
3  USA                 322 0.0813 281  41    0.1273
4  SPAIN               240 0.0606 205  35    0.1458
5  ITALY               224 0.0566 164  60    0.2679
6  UNITED KINGDOM      183 0.0462 133  50    0.2732
7  AUSTRALIA           176 0.0444 140  36    0.2045
8  FRANCE              172 0.0434 144  28    0.1628
9  NETHERLANDS         172 0.0434 125  47    0.2733
10 KOREA               150 0.0379 139  11    0.0733


SCP: Single Country Publications

MCP: Multiple Country Publications


Total Citations per Country

           Country      Total Citations Average Article Citations
1  USA                            36045                    111.94
2  JAPAN                           9649                     21.93
3  ITALY                           7665                     34.22
4  CHINA                           6776                     14.39
5  UNITED KINGDOM                  6096                     33.31
6  NETHERLANDS                     6058                     35.22
7  SPAIN                           5228                     21.78
8  FRANCE                          5079                     29.53
9  AUSTRALIA                       4904                     27.86
10 KOREA                           4662                     31.08


Most Relevant Sources

                                                      Sources        Articles
1  NUTRIENTS                                                              303
2  JOURNAL OF NUTRITION HEALTH AND AGING                                  241
3  CLINICAL NUTRITION                                                     113
4  BMC GERIATRICS                                                          98
5  CLINICAL NUTRITION ESPEN                                                79
6  GERIATRICS AND GERONTOLOGY INTERNATIONAL                                67
7  AGING CLINICAL AND EXPERIMENTAL RESEARCH                                64
8  ARCHIVES OF GERONTOLOGY AND GERIATRICS                                  64
9  EUROPEAN JOURNAL OF CLINICAL NUTRITION                                  63
10 INTERNATIONAL JOURNAL OF ENVIRONMENTAL RESEARCH AND PUBLIC HEALTH       62


Most Relevant Keywords

   Author Keywords (DE)      Articles Keywords-Plus (ID)     Articles
1         FRAILTY                 914   AGED                     7019
2         MALNUTRITION            790   FEMALE                   5932
3         SARCOPENIA              779   MALE                     5777
4         ELDERLY                 674   NUTRITIONAL STATUS       4413
5         OLDER ADULTS            408   HUMAN                    4185
6         NUTRITION               386   ARTICLE                  3595
7         NUTRITIONAL STATUS      350   HUMANS                   3408
8         AGING                   224   MALNUTRITION             3176
9         AGED                    206   FRAILTY                  2363
10        MORTALITY               176   GERIATRIC ASSESSMENT     2337

3.2 Production

3.2.1 Summary

Code
eldmln_ds %>% 
  count(PY) %>% 
  mutate(Gap = case_when(
    PY %in% 1995:2004 ~ "1995-2004", 
    PY %in% 2005:2014 ~ "2005-2014", 
    PY %in% 2015:2024 ~ "2015-2024"
  )) %>% 
  group_by(Gap) %>% 
  summarise(n = sum(n), .groups = "drop") %>% 
  bind_rows(., 
            summarise(., Gap = "1995-2024 (total)", n = sum(n)))

3.2.2 Trend

30 years

Code
gm_agr_9524 <- eldmln_ds %>% 
  count(PY) %>%
  mutate(AGR = (n - lag(n)) / lag(n) * 100) %>% 
  filter(!is.na(AGR)) %>% 
  summarise(geom_mean_agr = (exp(mean(log(1 + AGR / 100))) - 1) * 100) %>% 
  pull(geom_mean_agr)

gm_agr_9524
[1] 13.3088

1995 - 2004

Code
gm_agr_9504 <- eldmln_ds %>% 
  count(PY) %>% 
  filter(PY %in% 1995:2004) %>% 
  mutate(AGR = (n - lag(n)) / lag(n) * 100) %>% 
  filter(!is.na(AGR)) %>% 
  summarise(geom_mean_agr = (exp(mean(log(1 + AGR / 100))) - 1) * 100) %>% 
  pull(geom_mean_agr)

gm_agr_9504
[1] 12.9831

2005 - 2014

Code
gm_agr_0514 <- eldmln_ds %>% 
  count(PY) %>% 
  filter(PY %in% 2005:2014) %>% 
  mutate(AGR = (n - lag(n)) / lag(n) * 100) %>% 
  filter(!is.na(AGR)) %>% 
  summarise(geom_mean_agr = (exp(mean(log(1 + AGR / 100))) - 1) * 100) %>% 
  pull(geom_mean_agr)

gm_agr_0514
[1] 16.83182

2015 - 2024

Code
gm_agr_1524 <- eldmln_ds %>% 
  count(PY) %>% 
  filter(PY %in% 2015:2024) %>% 
  mutate(AGR = (n - lag(n)) / lag(n) * 100) %>% 
  filter(!is.na(AGR)) %>% 
  summarise(geom_mean_agr = (exp(mean(log(1 + AGR / 100))) - 1) * 100) %>% 
  pull(geom_mean_agr)

gm_agr_1524
[1] 13.62649
Code
#4682B4 steelblue
#CD5C5C indianred
#2E8B57 seagreen

eldmln_ds %>%
  count(PY) %>% 
  ggplot(aes(x = PY, y = n)) +
  geom_vline(xintercept = c(2004, 2014), 
             linetype = "dashed", color = "#CD5C5C") +  # Indian Red
  geom_col(fill = "#4682B4", color = "black") + # steelblue
  annotate("text", x = 2006, y = 600, 
           label = paste0("1995-2024 AGR = ", 
                          sprintf("%.1f", gm_agr_9524), "%"), 
           color = "black", hjust = 0, size = 3) +  # 1995-2024
  annotate("text", x = 1996, y = 100, 
           label = paste0("1995-2004 AGR = ", 
                          sprintf("%.1f", gm_agr_9504), "%"), 
           color = "black", hjust = 0, size = 3) +  # 1995-2004
  annotate("text", x = 2006, y = 200, 
           label = paste0("2005-2014 AGR = ", 
                          sprintf("%.1f", gm_agr_0514), "%"), 
           color = "black", hjust = 0, size = 3) +  # 2005-2014
  annotate("text", x = 2016, y = 550, 
           label = paste0("2015-2024 AGR = ", 
                          sprintf("%.1f", gm_agr_1524), "%"), 
           color = "black", hjust = 0, size = 3) +  # 2015-2024
  scale_x_continuous(breaks = seq(1989, 2029, 5)) +
  scale_y_continuous(breaks = seq(0, 600, 100)) + 
  coord_cartesian(ylim = c(0, 620)) +
  labs(title = "Annual Publication Count",
       x = "Publication Year",
       y = "Number of Publications") +
  theme_bw()

3.3 Journal

3.3.1 Summary

Code
eldmln_ds %>% 
  summarise(total_unique_journals = n_distinct(SO))
Code
eldmln_ds %>% 
  count(SO, sort = TRUE) %>% 
  slice_max(n, n = 10)

3.3.2 Trend

Code
eldmln_ds %>% 
  filter(PY %in% 1995:2004) %>% 
  count(SO, sort = TRUE) %>% 
  slice_max(n, n = 5)
Code
eldmln_ds %>% 
  filter(PY %in% 2005:2014) %>% 
  count(SO, sort = TRUE) %>% 
  slice_max(n, n = 5)
Code
eldmln_ds %>% 
  filter(PY %in% 2015:2024) %>% 
  count(SO, sort = TRUE) %>% 
  slice_max(n, n = 5)

3.4 Author

Code
oriau_byti0 <- eldmln_ds %>% 
  select(TI, PY, AF) %>% 
  separate_wider_delim(AF, delim = ";", names = paste0("af", 1:10), 
                       too_many = "drop", too_few = "align_start") %>% 
  pivot_longer(cols = starts_with("af"), 
               names_to = "author_position", 
               values_to = "au_nameid", values_drop_na = TRUE) %>% 
  mutate(au_nameid = str_trim(au_nameid),  
         au_name = str_extract(au_nameid, "^[^(]+"), 
         au_name = str_trim(au_name), 
         au_name = str_to_upper(au_name), 
         au_name = stri_trans_general(au_name, "Latin-ASCII"), 
         au_name = str_replace_all(au_name, "-", " "), 
         au_name = str_replace_all(au_name, ",", " "), 
         au_name = str_replace_all(au_name, "\\.", " "), 
         au_name = str_trim(au_name), 
         au_name = str_replace_all(au_name, "\\s+", " "), 
         au_scid = str_extract(au_nameid, "(?<=\\().+?(?=\\))")) %>% 
  arrange(au_scid)

oriau_byti0
Code
# Step 1: Standardized Name by Scopus ID (Preferring Longer Names)
stdname_list1 <- oriau_byti0 %>% 
  arrange(au_scid, desc(nchar(au_name)), PY, author_position) %>%  # Prefer longer names first
  group_by(au_scid) %>%
  summarise(std_name = first(au_name), .groups = "drop") %>% 
  arrange(desc(au_scid))  # Descending order by Scopus ID

# Step 2: Merge Standardized Name Back
oriau_byti1 <- oriau_byti0 %>% 
  left_join(stdname_list1, by = "au_scid")

# Step 3: Standardized Scopus ID by Standardized Name (Preferring Longer Names)
stdscid_list1 <- oriau_byti1 %>% 
  arrange(desc(nchar(std_name)), PY, author_position) %>%  # Prefer longer names
  group_by(std_name) %>% 
  summarise(std_scid = first(au_scid), .groups = "drop") %>% 
  arrange(desc(std_name))  # Descending order by name

# Step 4: Final Merge - Add Standardized Scopus IDs Back
oriau_byti2 <- oriau_byti1 %>% 
  left_join(stdscid_list1, by = "std_name")

oriau_byti2
Code
# Check for inconsistencies

oriau_byti2 %>% 
  count(std_name, std_scid) %>% 
  count(std_name) %>% filter(n > 1)
Code
oriau_byti2 %>% 
  count(std_name, std_scid) %>% 
  count(std_scid) %>% filter(n > 1)
Code
oriau_byti2 %>% 
  filter(str_detect(std_name, regex("bald", ignore_case = TRUE)))
Code
oriau_byti2 %>% 
  filter(str_detect(std_name, regex("bulu", ignore_case = TRUE)))
Code
oriau_byti2 %>% 
  filter(au_name != std_name) %>% 
  arrange(std_name)

3.4.1 Summary

Code
oriau_byti2 %>% 
  summarise(total_authors = n(), 
            unique_authors = n_distinct(std_name))

3.4.2 Trend

Code
oriau_byti2 %>% 
  count(std_name, sort = T) %>% 
  slice_head(n = 5)
Code
oriau_byti2 %>% 
  filter(PY %in% 1995:2004) %>% 
  count(std_name, sort = T) %>% 
  slice_head(n = 5)
Code
oriau_byti2 %>% 
  filter(PY %in% 2005:2014) %>% 
  count(std_name, sort = T) %>% 
  slice_head(n = 5)
Code
oriau_byti2 %>% 
  filter(PY %in% 2015:2024) %>% 
  count(std_name, sort = T) %>% 
  slice_head(n = 5)

3.5 Keyword

3.5.1 Author Keywords

Code
eldmln_ds %>% 
  mutate(keyword_count = str_count(DE, ";") + 1) %>% 
  count(keyword_count)
Code
#4682B4 steelblue
#CD5C5C indianred
#2E8B57 seagreen

eldmln_ds %>% 
  mutate(keyword_count = str_count(DE, ";") + 1) %>% 
  count(keyword_count) %>% 
  ggplot(aes(x = keyword_count, y = n)) + 
  geom_col(fill = "#4682B4", color = "black") + # steelblue
  scale_x_continuous(breaks = seq(1, 21, 1)) + 
  scale_y_continuous(breaks = seq(0, 2000, 200)) + 
  coord_cartesian(xlim = c(1, 15)) + # there are papers with DE > 15!
  labs(title = "Number of Author's Keyword per Articles", 
       x = "Author's Keywords Count", 
       y = "Number of Publications") +
  theme_bw()

Code
debyti <- eldmln_ds %>% 
  select(TI, PY, DE) %>% 
  separate_wider_delim(DE, delim = ";", names = paste0("de", 1:10), 
                       too_many = "drop", too_few = "align_start") %>% 
  pivot_longer(cols = starts_with("de"), 
               names_to = "de_position", 
               values_to = "de_aukw", 
               values_drop_na = T) %>% 
  mutate(de_aukw = str_trim(de_aukw), 
         de_aukw = str_to_upper(de_aukw), 
         de_aukw = str_replace_all(de_aukw, "\\s+", " "), 
         de_aukw = str_replace_all(de_aukw, "-", " "), 
         de_aukw = stri_trans_general(de_aukw, "Latin-ASCII") )

debyti
Code
debyti %>% 
  count(de_aukw, sort = T)

3.5.2 chatgpt fuzzy matching

Code
library(stringdist)

# Create a list of unique keywords
keywords <- unique(debyti$de_aukw)

# Compute pairwise distances
keyword_dist <- stringdistmatrix(keywords, keywords, method = "lv")  # Levenshtein distance

# Convert to a tibble for easier filtering
keyword_pairs <- as_tibble(expand.grid(keyword1 = keywords, keyword2 = keywords)) %>%
  mutate(distance = as.vector(keyword_dist)) %>%
  filter(keyword1 != keyword2, distance <= 2)  # Set threshold for similarity

keyword_pairs

keyword_pairs %>% 
  count(keyword2)
Code
library(quanteda)
library(cluster)
library(dplyr)

# Create a document-feature matrix (DFM) with tokenized keywords
dfm_keywords <- dfm(tokens(debyti$de_aukw), tolower = TRUE)

# Convert DFM to a TF-IDF matrix
dfm_tfidf <- dfm_tfidf(dfm_keywords)  # Correct function for TF-IDF transformation

# Convert to a dense matrix for clustering
tfidf_matrix <- convert(dfm_tfidf, to = "matrix")

# Compute distance matrix
dist_matrix <- dist(tfidf_matrix)

# Apply hierarchical clustering
keyword_clusters <- hclust(dist_matrix)

# Cut tree into 50 clusters (adjust as needed)
debyti$keyword_cluster <- cutree(keyword_clusters, k = 50)

# Assign a standard keyword per cluster
cluster_mapping <- debyti %>%
  group_by(keyword_cluster) %>%
  summarise(std_keyword = first(de_aukw), .groups = "drop")

# Merge standard keywords back into debyti
debyti2 <- debyti %>%
  left_join(cluster_mapping, by = "keyword_cluster") %>%
  mutate(de_aukw = coalesce(std_keyword, de_aukw)) %>%
  select(-std_keyword, -keyword_cluster)

# View updated dataset
head(debyti2)